import pyaudio
import wave
import numpy as np
import rospy
from audio_common_msgs.msg import AudioData
from std_msgs.msg import String
from struct import pack
import os
import sys
import itertools
import glob
import argparse
from gmm_code.interface import ModelInterface
import numpy as np
from gmm_code.features import get_feature
from array import array

CHUNK = 1024
# FORMAT = pyaudio.paInt16
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "audio.wav"


class MyAudio(object):
    audio_data = []

    def __init__(self):
        self.sample_rate = rospy.get_param('audio_capture/sample_rate')
        self.depth = 1  # rospy.get_param('audio_capture/depth')
        self.channels = rospy.get_param('audio_capture/channels')
        self.pub = rospy.Publisher('recognition', String, queue_size=1, tcp_nodelay=True)
        self.audio_sub = rospy.Subscriber('/audio', AudioData, self.recode_data)
        self.audio_data = []
        print('Please begin to speak ,for 5 s')
        self.save_data()

    def record_to_file(self,data):
        "Records from the microphone and outputs the resulting data to 'path'"
        #data = pack('<' + ('h' * len(data)), *data)
        wf = wave.open('human_test.wav', 'wb')
        wf.setnchannels(self.channels)
        wf.setsampwidth(2)
        wf.setframerate(self.sample_rate)
        #wf.writeframes(data)
        wf.writeframes(b''.join(data))
        print(len(data))
        wf.close()

    def recode_data(self, msg):
        j = 0
        for i in range(160):
            aa = (msg.data[j]&0xff) | ((msg.data[j+1]&0xff)<<8)
            j += 2
            self.audio_data.append(aa)

    def save_data(self):
        rate = rospy.Rate(100)
        count = 0
        while not rospy.is_shutdown():
            count += 1
            if count > 500:
                self.aa = np.array(self.audio_data, dtype=np.uint16)
                self.record_to_file(self.aa )
                self.predict(self.aa)
                break
            rate.sleep()
     
    def task_predict(self,input_model,data):
	    m = ModelInterface.load(input_model)
	    fs = int(self.sample_rate)
	    feat = get_feature(fs,data)
	    label, score = m.predict(feat)
	    return label

    def predict(self,data):
       # self.gender_model = '/home/banana/human_voice/src/bear_voice_recognition/script/model/bear_gender2.out'
        self.speaker_model = '/home/banana/human_voice/src/bear_voice_recognition/script/model/self_txt2.out'
        #gender_label = self.task_predict(self.gender_model,data)
        speaker_label = self.task_predict(self.speaker_model,data)
        #self.record_to_file("human_test.wav")
        #output_str = "bear %s %s" %(gender_label,speaker_label)
        output_str = "bear %s" %(speaker_label)
        print(output_str)
        self.pub.publish(output_str)



def main():
    '''main'''
    rospy.init_node("node_name", anonymous=True)
    MyAudio()
    print('End')


if __name__ == '__main__':
    while True:
          main()

